################################################################################
## Launching Revolution: Social Media and the Egyptian Uprising's First Movers
## BJPoLS 2018
## Clarke & Kocak
## Survey Analyses Replication Code
################################################################################

## Load packages
library(CBPS)
library(SVMMatch)
library(ebal)
library(MatchIt)
library(corrplot)
library(boot)
library(xtable)
library(stargazer)

## Load data

## Note: AB data can be downloaded from the Arab Barometer's website
## http://www.arabbarometer.org/instruments-and-data-files
## The survey used is Egypt Wave II

## The TDS data has been made available on Dataverse under a creative commons license 

AB <- read.csv("AB_Egypt.csv")
TDS <- read.csv("TDS.csv")

###################################
## 1. AB Data Cleaning
###################################

## Subset AB data
## Relevant questions
AB.sub <- AB[,c(6,7,160:164,167:175,177:180,185:186,191:192,377:409, 442:444, 469)]

## Rename key variables
colnames(AB.sub)[1]<- c("gov")
colnames(AB.sub)[8]<- c("internet")
colnames(AB.sub)[12]<- c("social")
colnames(AB.sub)[13]<- c("dialogue")
colnames(AB.sub)[14]<- c("facebook")
colnames(AB.sub)[15]<- c("twitter")
colnames(AB.sub)[17]<- c("charity")
colnames(AB.sub)[18]<- c("union")
colnames(AB.sub)[19]<- c("youthgr")
colnames(AB.sub)[20]<- c("tribe")
colnames(AB.sub)[21]<- c("party")
colnames(AB.sub)[26]<- c("protest")
colnames(AB.sub)[27]<- c("jan25")
colnames(AB.sub)[28]<- c("jan28")
colnames(AB.sub)[29]<- c("feb2")
colnames(AB.sub)[30]<- c("feb4")
colnames(AB.sub)[31]<- c("feb6")
colnames(AB.sub)[32]<- c("feb11")
colnames(AB.sub)[33]<- c("assistRev")
colnames(AB.sub)[34]<- c("supportInt")
colnames(AB.sub)[35]<- c("networkRev")
colnames(AB.sub)[36]<- c("mediaSource")
colnames(AB.sub)[58]<- c("age")
colnames(AB.sub)[59]<- c("sex")
colnames(AB.sub)[60]<- c("edu")


## Create education variables
AB.sub$primary <- ifelse(AB.sub$edu != "1. illiterate/literate" & 
                           AB.sub$edu != "0. missing" , 1, 0)
AB.sub$preparatory <- ifelse(AB.sub$edu != "1. illiterate/literate" & 
                               AB.sub$edu != "2. elementary" &  
                               AB.sub$edu != "0. missing" , 1, 0)
AB.sub$secondary <- ifelse(AB.sub$edu != "1. illiterate/literate" & 
                             AB.sub$edu != "2. elementary" &  
                             AB.sub$edu != "3. preparatory/basic" &  
                             AB.sub$edu != "0. missing" , 1, 0)
AB.sub$college <- ifelse(AB.sub$edu == "6. ba" | 
                           AB.sub$edu == "7. ma and above" , 1, 0)

## Fix other controls
AB.sub$sex <- ifelse(AB.sub$sex == "1. male", 1, 2)
AB.sub$internet <- ifelse(AB.sub$internet != "5. i do not use the internet" &
                            AB.sub$internet != "0. missing", 1, 0)
AB.sub$facebook <- ifelse(AB.sub$facebook == "1. yes", 1, 0)
AB.sub$facebook[is.na(AB.sub$facebook)] <- 0
AB.sub$twitter <- ifelse(AB.sub$twitter == "1. yes", 1, 0)
AB.sub$twitter[is.na(AB.sub$twitter)] <- 0
AB.sub$SM <- ifelse(AB.sub$twitter == 1 | AB.sub$facebook == 1, 1, 0)
AB.sub$charity <- ifelse(AB.sub$charity == "1. yes", 1, 0)
AB.sub$union <- ifelse(AB.sub$union == "1. yes", 1, 0)
AB.sub$youthgr <- ifelse(AB.sub$youthgr == "1. yes", 1, 0)
AB.sub$tribe <- ifelse(AB.sub$tribe == "1. yes", 1, 0)
AB.sub$party <- ifelse(AB.sub$party == "1. yes", 1, 0)
AB.sub$female <- ifelse(AB.sub$sex == 2, 1, 0)

## Create urban variable
AB.sub$urban <- ifelse(AB.sub$q13 == "1. urban", 1, 0)

## Create binary age variable
AB.sub$agegroup <- ifelse(AB.sub$age > 30, 1, 0)

## Ceate revolution variables
AB.sub$protest <- ifelse(AB.sub$protest == "1. yes", 1, 0)
AB.sub$jan25 <- ifelse(AB.sub$jan25 == "1. yes", 1, 0)
AB.sub$jan25[is.na(AB.sub$jan25)] <- 0
AB.sub$jan28 <- ifelse(AB.sub$jan28 == "1. yes", 1, 0)
AB.sub$jan28[is.na(AB.sub$jan28)] <- 0
AB.sub$first4 <- ifelse(AB.sub$jan28 == 1 | AB.sub$jan25 == 1, 1, 0)
AB.sub$feb2 <- ifelse(AB.sub$feb2 == "1. yes", 1, 0)
AB.sub$feb4 <- ifelse(AB.sub$feb4 == "1. yes", 1, 0)
AB.sub$feb6 <- ifelse(AB.sub$feb6 == "1. yes", 1, 0)
AB.sub$feb11 <- ifelse(AB.sub$feb11 == "1. yes", 1, 0)
AB.sub$assistRev <- ifelse(AB.sub$assistRev == "1. yes", 1, 0)
AB.sub$supportInt <- ifelse(AB.sub$supportInt == "1. yes", 1, 0)
AB.sub$networkRev <- ifelse(AB.sub$networkRev == "1. yes", 1, 0)
AB.sub$TVRev <- ifelse(AB.sub$mediaSource == "1. tv", 1, 0)
AB.sub$radioRev <- ifelse(AB.sub$mediaSource == "2. radio", 1, 0)
AB.sub$printRev <- ifelse(AB.sub$mediaSource == "3. newspapers (the daily press)", 1, 0)
AB.sub$internetRev <- ifelse(AB.sub$mediaSource == "4. the internet", 1, 0)
AB.sub$facebookRev <- ifelse(AB.sub$mediaSource == "5. facebook", 1, 0)
AB.sub$emailRev <- ifelse(AB.sub$mediaSource == "7. e-mail", 1, 0)
AB.sub$netRev <-ifelse(AB.sub$internetRev == 1 | AB.sub$facebookRev == 1, 1, 0)

## Create additional subsets
AB.sub.prot <- AB.sub[AB.sub$protest == 1,]
AB.sub.day1 <- AB.sub[AB.sub$jan25 == 1,]

###################################
## 2. TDS Data Cleaning
###################################

## Rename key variables
colnames(TDS)[8]<- c("internet")
colnames(TDS)[208]<- c("facebook")
colnames(TDS)[164]<- c("twitter")
colnames(TDS)[10]<- c("group")
colnames(TDS)[4]<- c("age")
colnames(TDS)[5]<- c("sex")
colnames(TDS)[7]<- c("edu")

## Pull out relevant associational groups
TDS$union <- ifelse(TDS$group == 1, 1, 0)
TDS$party <- ifelse(TDS$group == 2, 1, 0)
TDS$charity <- ifelse(TDS$group == 3, 1, 0)

## Pull out educational categories
TDS$primary <- ifelse(TDS$edu > 0, 1, 0)
TDS$preparatory <- ifelse(TDS$edu > 1, 1, 0)
TDS$secondary <- ifelse(TDS$edu > 2, 1, 0)
TDS$college <- ifelse(TDS$edu > 4, 1, 0)

## Fix other controls
TDS$facebook <- ifelse(TDS$facebook == 1, 1, 0)
TDS$twitter <- ifelse(TDS$twitter == 1, 1, 0)
TDS$internet <- ifelse(TDS$internet == 1, 1, 0)
TDS$female <- ifelse(TDS$sex == 2, 1, 0)

## Create binary age variable
TDS$agegroup <- ifelse(TDS$age > 30, 1, 0)

## Other variables
TDS$day1 <- ifelse(TDS$q601 == 2, 1, 0)
TDS$preprot <- ifelse(TDS$q602 == 2, 1, 0)
TDS$internet.phone <- ifelse(TDS$q106 == 1, 1, 0)
TDS$male <- 1 - TDS$female
colnames(TDS)[7]<- c("education")
TDS$blogsUse <- ifelse(TDS$q271 == 1 | TDS$q271 == 2, 1, 0)
TDS$emailUse <- ifelse(TDS$q281 == 1, 1, 0)
TDS$phoneUse <- ifelse(TDS$q211 == 1, 1, 0)
TDS$printUse <- ifelse(TDS$q241 == 1, 1, 0)
TDS$TVUse <- ifelse(TDS$q221 == 1, 1, 0)
TDS$textUse <- ifelse(TDS$q201 == 1, 1, 0)
TDS$blogsJan25 <- ifelse(TDS$q272 == 1 | TDS$q272 == 2, 1, 0)
TDS$emailJan25 <- ifelse(TDS$q282 == 1 | TDS$q282 == 2, 1, 0)
TDS$phoneJan25 <- ifelse(TDS$q212 == 1 | TDS$q212 == 2, 1, 0)
TDS$printJan25 <- ifelse(TDS$q242 == 1 | TDS$q242 == 2, 1, 0)
TDS$TVJan25 <- ifelse(TDS$q222 == 1 | TDS$q222 == 2, 1, 0)
TDS$textJan25 <- ifelse(TDS$q202 == 1 | TDS$q202 == 2, 1, 0)
TDS$twitterJan25 <- ifelse(TDS$q252 == 1 | TDS$q252 == 2, 1, 0)
TDS$facebookJan25 <- ifelse(TDS$q262 == 1 | TDS$q262 == 2, 1, 0)
TDS$f2fJan25 <- ifelse(TDS$q292 == 1 | TDS$q292 == 2, 1, 0)

## Remove nas
omit <- c(which(is.na(TDS$college) == TRUE), which(is.na(TDS$female) == TRUE),
          which(is.na(TDS$internet) == TRUE))
TDS <- TDS[-omit,]

##########################################################
## 3. Produce values in Table 1: Internet & Social Media Use Among Egyptians
## and conduct prop tests
##########################################################

## Internet use
sum(AB.sub.day1$internet) / nrow(AB.sub.day1)
sum(AB.sub.prot$internet[AB.sub.prot$jan25 == 0]) / nrow(AB.sub.prot[AB.sub.prot$jan25 == 0,])
sum(AB.sub$internet[AB.sub$protest == 0]) / nrow(AB.sub[AB.sub$protest == 0,])

## Facebook use
sum(AB.sub.day1$facebook) / nrow(AB.sub.day1)
sum(AB.sub.prot$facebook[AB.sub.prot$jan25 == 0]) / nrow(AB.sub.prot[AB.sub.prot$jan25 == 0,])
sum(AB.sub$facebook[AB.sub$protest == 0]) / nrow(AB.sub[AB.sub$protest == 0,])

## Twitter use
sum(AB.sub.day1$twitter) / nrow(AB.sub.day1)
sum(AB.sub.prot$twitter[AB.sub.prot$jan25 == 0]) / nrow(AB.sub.prot[AB.sub.prot$jan25 == 0,])
sum(AB.sub$twitter[AB.sub$protest == 0]) / nrow(AB.sub[AB.sub$protest == 0,])

## Support revolution online
sum.e <- sum(AB.sub$supportInt[AB.sub$protest == 0])
sum.p <- sum(AB.sub.prot$supportInt[AB.sub.prot$jan25 == 0])
sum.f <- sum(AB.sub.day1$supportInt)
n.e <- nrow(AB.sub[AB.sub$protest == 0,])
n.p <- nrow(AB.sub.prot[AB.sub.prot$jan25 == 0,])
n.f <- nrow(AB.sub.day1)

sum.e/n.e
sum.p/n.p
sum.f/n.f

## Proptests
n.e <- nrow(AB.sub[AB.sub$protest == 0,])
n.p <- nrow(AB.sub.prot[AB.sub.prot$jan25 == 0,])

prop.test(c(sum(AB.sub$internet[AB.sub$protest == 0]), 
            sum(AB.sub.prot$internet[AB.sub.prot$jan25 == 0])), 
          c(n.e, n.p), conf.level = 0.95, 
          alternative = "two.sided", correct = FALSE)

prop.test(c(sum(AB.sub$facebook[AB.sub$protest == 0]), 
            sum(AB.sub.prot$facebook[AB.sub.prot$jan25 == 0])), 
          c(n.e, n.p), conf.level = 0.95, 
          alternative = "two.sided", correct = FALSE)

prop.test(c(sum(AB.sub$twitter[AB.sub$protest == 0]), 
            sum(AB.sub.prot$twitter[AB.sub.prot$jan25 == 0])), 
          c(n.e, n.p), conf.level = 0.95, 
          alternative = "two.sided", correct = FALSE)

prop.test(c(sum.p, sum.f), c(n.p, n.f), conf.level = 0.95, 
          alternative = "two.sided", correct = FALSE)

prop.test(c(sum.p, sum.e), c(n.p, n.e), conf.level = 0.95, 
          alternative = "two.sided", correct = FALSE)

##########################################################
## 4. Regressions to produce Table 2: Effect of Internet and 
## Social Media Use on Protest Participation
##########################################################

modAB1 <- glm(jan25 ~ age + female + college + urban + internet + SM, 
              data = AB.sub.prot, family = "binomial")

modAB2 <- glm(jan25 ~ age + female + college + urban + internet + supportInt, 
              data = AB.sub.prot, family = "binomial")

modAB3 <- glm(protest ~ age + female + college + urban + internet + SM, 
              data = AB.sub, family = "binomial")

modAB4 <- glm(protest ~ age + female + college + urban + internet + supportInt, 
              data = AB.sub, family = "binomial")

stargazer(modAB1, modAB2, modAB3, modAB4)

###################################
## 5. Create Figure 5: TDS Sample vs. Arab Barometer
###################################

## Subset data frame: AB, Urban Egyptians, and Urban Protesters
AB.sub.U <- subset(AB.sub, AB.sub$urban == "1")
AB.sub.U.pr <- subset(AB.sub.U, AB.sub.U$protest == "1")

## Create new weight vectors for these subsets
AB.sub.U$wt.U <- (AB.sub.U$wt)*(nrow(AB.sub.U) / sum(AB.sub.U$wt))
AB.sub.U.pr$wt.U.pr <- (AB.sub.U.pr$wt)*(nrow(AB.sub.U.pr) / sum(AB.sub.U.pr$wt))

## Determine means and SEs for each comparison category
n.TDS <- nrow(TDS)
n.AB.U <- nrow(AB.sub.U)
n.AB.U.pr <- nrow(AB.sub.U.pr)

## Female
## TDS
mean.female.TDS <- mean(na.omit(TDS$female))
se.female.TDS <- sqrt(mean.female.TDS * (1 - mean.female.TDS) / n.TDS)
lower.female.TDS <- mean.female.TDS - qnorm(0.975) * se.female.TDS
upper.female.TDS <- mean.female.TDS + qnorm(0.975) * se.female.TDS

## Urban Egyptians
mean.female.AB.U <- mean(na.omit(AB.sub.U$female)*AB.sub.U$wt.U)
se.female.AB.U <- sqrt(mean.female.AB.U * (1 - mean.female.AB.U) / n.AB.U)
lower.female.AB.U <- mean.female.AB.U - qnorm(0.975) * se.female.AB.U
upper.female.AB.U <- mean.female.AB.U + qnorm(0.975) * se.female.AB.U

## Urban Protesters
mean.female.AB.U.pr <- mean(na.omit(AB.sub.U.pr$female)*AB.sub.U.pr$wt.U.pr)
se.female.AB.U.pr <- sqrt(mean.female.AB.U.pr * (1 - mean.female.AB.U.pr) / n.AB.U.pr)
lower.female.AB.U.pr <- mean.female.AB.U.pr - qnorm(0.975) * se.female.AB.U.pr
upper.female.AB.U.pr <- mean.female.AB.U.pr + qnorm(0.975) * se.female.AB.U.pr

## College
## TDS
mean.college.TDS <- mean(na.omit(TDS$college))
se.college.TDS <- sqrt(mean.college.TDS * (1 - mean.college.TDS) / n.TDS)
lower.college.TDS <- mean.college.TDS - qnorm(0.975) * se.college.TDS
upper.college.TDS <- mean.college.TDS + qnorm(0.975) * se.college.TDS

## Urban Egyptians
mean.college.AB.U <- mean(na.omit(AB.sub.U$college)*AB.sub.U$wt.U)
se.college.AB.U <- sqrt(mean.college.AB.U * (1 - mean.college.AB.U) / n.AB.U)
lower.college.AB.U <- mean.college.AB.U - qnorm(0.975) * se.college.AB.U
upper.college.AB.U <- mean.college.AB.U + qnorm(0.975) * se.college.AB.U

## Urban Protesters
mean.college.AB.U.pr <- mean(na.omit(AB.sub.U.pr$college)*AB.sub.U.pr$wt.U.pr)
se.college.AB.U.pr <- sqrt(mean.college.AB.U.pr * (1 - mean.college.AB.U.pr) / n.AB.U.pr)
lower.college.AB.U.pr <- mean.college.AB.U.pr - qnorm(0.975) * se.college.AB.U.pr
upper.college.AB.U.pr <- mean.college.AB.U.pr + qnorm(0.975) * se.college.AB.U.pr

## Internet
## TDS
mean.internet.TDS <- mean(na.omit(TDS$internet))
se.internet.TDS <- sqrt(mean.internet.TDS * (1 - mean.internet.TDS) / n.TDS)
lower.internet.TDS <- mean.internet.TDS - qnorm(0.975) * se.internet.TDS
upper.internet.TDS <- mean.internet.TDS + qnorm(0.975) * se.internet.TDS

## Urban Egyptians
mean.internet.AB.U <- mean(na.omit(AB.sub.U$internet)*AB.sub.U$wt.U)
se.internet.AB.U <- sqrt(mean.internet.AB.U * (1 - mean.internet.AB.U) / n.AB.U)
lower.internet.AB.U <- mean.internet.AB.U - qnorm(0.975) * se.internet.AB.U
upper.internet.AB.U <- mean.internet.AB.U + qnorm(0.975) * se.internet.AB.U

## Urban Protesters
mean.internet.AB.U.pr <- mean(na.omit(AB.sub.U.pr$internet)*AB.sub.U.pr$wt.U.pr)
se.internet.AB.U.pr <- sqrt(mean.internet.AB.U.pr * (1 - mean.internet.AB.U.pr) / n.AB.U.pr)
lower.internet.AB.U.pr <- mean.internet.AB.U.pr - qnorm(0.975) * se.internet.AB.U.pr
upper.internet.AB.U.pr <- mean.internet.AB.U.pr + qnorm(0.975) * se.internet.AB.U.pr

## Charity
## TDS
mean.charity.TDS <- mean(na.omit(TDS$charity))
se.charity.TDS <- sqrt(mean.charity.TDS * (1 - mean.charity.TDS) / n.TDS)
lower.charity.TDS <- mean.charity.TDS - qnorm(0.975) * se.charity.TDS
upper.charity.TDS <- mean.charity.TDS + qnorm(0.975) * se.charity.TDS

## Urban Egyptians
mean.charity.AB.U <- mean(na.omit(AB.sub.U$charity)*AB.sub.U$wt.U)
se.charity.AB.U <- sqrt(mean.charity.AB.U * (1 - mean.charity.AB.U) / n.AB.U)
lower.charity.AB.U <- mean.charity.AB.U - qnorm(0.975) * se.charity.AB.U
upper.charity.AB.U <- mean.charity.AB.U + qnorm(0.975) * se.charity.AB.U

## Urban Protesters
mean.charity.AB.U.pr <- mean(na.omit(AB.sub.U.pr$charity)*AB.sub.U.pr$wt.U.pr)
se.charity.AB.U.pr <- sqrt(mean.charity.AB.U.pr * (1 - mean.charity.AB.U.pr) / n.AB.U.pr)
lower.charity.AB.U.pr <- mean.charity.AB.U.pr - qnorm(0.975) * se.charity.AB.U.pr
upper.charity.AB.U.pr <- mean.charity.AB.U.pr + qnorm(0.975) * se.charity.AB.U.pr

## Union
## TDS
mean.union.TDS <- mean(na.omit(TDS$union))
se.union.TDS <- sqrt(mean.union.TDS * (1 - mean.union.TDS) / n.TDS)
lower.union.TDS <- mean.union.TDS - qnorm(0.975) * se.union.TDS
upper.union.TDS <- mean.union.TDS + qnorm(0.975) * se.union.TDS

## Urban Egyptians
mean.union.AB.U <- mean(na.omit(AB.sub.U$union)*AB.sub.U$wt.U)
se.union.AB.U <- sqrt(mean.union.AB.U * (1 - mean.union.AB.U) / n.AB.U)
lower.union.AB.U <- mean.union.AB.U - qnorm(0.975) * se.union.AB.U
upper.union.AB.U <- mean.union.AB.U + qnorm(0.975) * se.union.AB.U

## Urban Protesters
mean.union.AB.U.pr <- mean(na.omit(AB.sub.U.pr$union)*AB.sub.U.pr$wt.U.pr)
se.union.AB.U.pr <- sqrt(mean.union.AB.U.pr * (1 - mean.union.AB.U.pr) / n.AB.U.pr)
lower.union.AB.U.pr <- mean.union.AB.U.pr - qnorm(0.975) * se.union.AB.U.pr
upper.union.AB.U.pr <- mean.union.AB.U.pr + qnorm(0.975) * se.union.AB.U.pr

## Age
## TDS
mean.age.TDS <- mean(na.omit(TDS$age))
se.age.TDS <- sd(na.omit(TDS$age)) / sqrt(n.TDS)
lower.age.TDS <- mean.age.TDS - qnorm(0.975) * se.age.TDS
upper.age.TDS <- mean.age.TDS + qnorm(0.975) * se.age.TDS

## Urban Egyptians
mean.age.AB.U <- mean(na.omit(AB.sub.U$age)*AB.sub.U$wt.U)
se.age.AB.U <- sd(na.omit(AB.sub.U$age)*AB.sub.U$wt.U) / sqrt(n.AB.U)
lower.age.AB.U <- mean.age.AB.U - qnorm(0.975) * se.age.AB.U
upper.age.AB.U <- mean.age.AB.U + qnorm(0.975) * se.age.AB.U

## Urban Protesters
mean.age.AB.U.pr <- mean(na.omit(AB.sub.U.pr$age)*AB.sub.U.pr$wt.U.pr)
se.age.AB.U.pr <- sd(na.omit(AB.sub.U.pr$age)*AB.sub.U.pr$wt.U.pr) / sqrt(n.AB.U.pr)
lower.age.AB.U.pr <- mean.age.AB.U.pr - qnorm(0.975) * se.age.AB.U.pr
upper.age.AB.U.pr <- mean.age.AB.U.pr + qnorm(0.975) * se.age.AB.U.pr

## Age group
## TDS
mean.agegroup.TDS <- mean(na.omit(TDS$agegroup))
se.agegroup.TDS <- sqrt(mean.agegroup.TDS * (1 - mean.agegroup.TDS) / n.TDS)
lower.agegroup.TDS <- mean.agegroup.TDS - qnorm(0.975) * se.agegroup.TDS
upper.agegroup.TDS <- mean.agegroup.TDS + qnorm(0.975) * se.agegroup.TDS

## Urban Egyptians
mean.agegroup.AB.U <- mean(na.omit(AB.sub.U$agegroup)*AB.sub.U$wt.U)
se.agegroup.AB.U <- sqrt(mean.agegroup.AB.U * (1 - mean.agegroup.AB.U) / n.AB.U)
lower.agegroup.AB.U <- mean.agegroup.AB.U - qnorm(0.975) * se.agegroup.AB.U
upper.agegroup.AB.U <- mean.agegroup.AB.U + qnorm(0.975) * se.agegroup.AB.U

## Urban Protesters
mean.agegroup.AB.U.pr <- mean(na.omit(AB.sub.U.pr$agegroup)*AB.sub.U.pr$wt.U.pr)
se.agegroup.AB.U.pr <- sqrt(mean.agegroup.AB.U.pr * (1 - mean.agegroup.AB.U.pr) / n.AB.U.pr)
lower.agegroup.AB.U.pr <- mean.agegroup.AB.U.pr - qnorm(0.975) * se.agegroup.AB.U.pr
upper.agegroup.AB.U.pr <- mean.agegroup.AB.U.pr + qnorm(0.975) * se.agegroup.AB.U.pr

## dot chart comparing means
## mean vectors
Urban.means <- c(mean.agegroup.AB.U, mean.female.AB.U, mean.college.AB.U, 
                 mean.internet.AB.U, mean.union.AB.U, mean.charity.AB.U)
UrbanProtest.means <- c(mean.agegroup.AB.U.pr, mean.female.AB.U.pr, mean.college.AB.U.pr, 
                        mean.internet.AB.U.pr, mean.union.AB.U.pr, mean.charity.AB.U.pr)
TDS.means <- c(mean.agegroup.TDS, mean.female.TDS, mean.college.TDS, mean.internet.TDS, 
               mean.union.TDS, mean.charity.TDS)
data <- cbind(TDS.means, UrbanProtest.means, Urban.means)
data <- as.data.frame(data)
row.names(data) <- c("Over 30 Years Old","Female", "BA Degree or Above", "Internet User", 
                     "Union Member", "Charitable Assn Member")

## plot
dotchart(t(data), color=c("blue","black", "black"),
         cex=0.8, pch=16, xlim=c(0,1),
         labels = c('TDS', 'AB (Urban Protesters)', 'AB (Urban Egyptians)'), 
         xlab = "Proportion")
segments(lower.charity.TDS, 1:1, upper.charity.TDS, 1:1, col="blue")
segments(lower.charity.AB.U.pr, 2:2, upper.charity.AB.U.pr, 2:2, col="black")
segments(lower.charity.AB.U, 3:3, upper.charity.AB.U, 3:3, col="black")
segments(lower.union.TDS, 6:6, upper.union.TDS, 6:6, col="blue")
segments(lower.union.AB.U.pr, 7:7, upper.union.AB.U.pr, 7:7, col="black")
segments(lower.union.AB.U, 8:8, upper.union.AB.U, 8:8, col="black")
segments(lower.internet.TDS, 11:11, upper.internet.TDS, 11:11, col="blue")
segments(lower.internet.AB.U.pr, 12:12, upper.internet.AB.U.pr, 12:12, col="black")
segments(lower.internet.AB.U, 13:13, upper.internet.AB.U, 13:13, col="black")
segments(lower.college.TDS, 16:16, upper.college.TDS, 16:16, col="blue")
segments(lower.college.AB.U.pr, 17:17, upper.college.AB.U.pr, 17:17, col="black")
segments(lower.college.AB.U, 18:18, upper.college.AB.U, 18:18, col="black")
segments(lower.female.TDS, 21:21, upper.female.TDS, 21:21, col="blue")
segments(lower.female.AB.U.pr, 22:22, upper.female.AB.U.pr, 22:22, col="black")
segments(lower.female.AB.U, 23:23, upper.female.AB.U, 23:23, col="black")
segments(lower.agegroup.TDS, 26:26, upper.agegroup.TDS, 26:26, col="blue")
segments(lower.agegroup.AB.U.pr, 27:27, upper.agegroup.AB.U.pr, 27:27, col="black")
segments(lower.agegroup.AB.U, 28:28, upper.agegroup.AB.U, 28:28, col="black")
dev.off()

########################################
## 6. Construct weights for TDS sample
########################################

## Original weighting method
TDS.sample <- subset.data.frame(TDS, select = c(age, college, internet, female))
TDS.sample$wt <- 1
TDS.sample$AB <- 0

AB.U.sample <- subset.data.frame(AB.sub.U.pr, 
                                 select = c(age, college, internet, female, wt))
AB.U.sample$AB <- 1
AB.U.sample$wt <- (nrow(AB.U.sample) / sum(AB.U.sample$wt)) * AB.U.sample$wt
sample.U <- rbind(TDS.sample, AB.U.sample)

## Run models to find Pr(AB | AB + TDS), with AB weights
y1 <- as.matrix(sample.U$AB)
y0 <- as.matrix(1- sample.U$AB)
y <- cbind(y1, y0)
fit.U <- glm(y ~ age + college + internet + female, 
             data = sample.U, family = "binomial", weights = wt)
summary(fit.U)

## Generate predicted probabilities of being in the AB sample for each TDS observation
predictions.U <- predict(fit.U, newdata = TDS, type = "response", se = TRUE)

## Generate weight vector based on p/1-p
TDS$weights.U <- (predictions.U$fit / (1 - predictions.U$fit)) * 
  (nrow(TDS.sample) / nrow(AB.U.sample))

## Other weighting methods
TDS.match <- subset.data.frame(TDS, select = c(age, college, internet, female))
TDS.match$wt <- 1
TDS.match$treat <- 0

AB.match <- subset.data.frame(AB.sub.U.pr, 
                              select = c(age, college, internet, female, wt))
AB.match$treat <- 1
AB.match$wt <- (nrow(AB.match) / sum(AB.match$wt)) * AB.match$wt
match <- rbind(TDS.match, AB.match)

## Run models to find Pr(AB | AB + TDS), with AB weights
y1 <- as.matrix(match$treat)
y0 <- as.matrix(1- match$treat)
y <- cbind(y1, y0)
fit.match <- glm(y ~ age + college + internet + female, 
                 data = match, family = "binomial", weights = wt)
summary(fit.match)

## Creat matrices
treat <- as.matrix(match$treat)
X <- as.matrix(subset(match, select = c(age, college, internet, female)))

## Create vector of ids for untreated and treated
id.untreated <- which(match$treat == 0)
id.treated <- which(match$treat == 1)

## CBPS 1:1
fit1 <- CBPS(treat ~ age + college + internet + female, 
             data = match, weights = wt, ATT = TRUE)

match.CBPS1 <- matchit(treat ~ fitted(fit1), method = "nearest", data = match,
                       replace = TRUE, ratio = 1)

CBPSweights <- match.CBPS1$weights[id.untreated]
CBPSweights <- (nrow(TDS) / sum(CBPSweights)) * CBPSweights
TDS$CBPSweights <- CBPSweights

## Propensity score 1:1
match.prop1 <- matchit(treat ~ fitted(fit.match), method = "nearest", data = match,
                       replace = TRUE, ratio = 1)

weights.prop1 <- match.prop1$weights[id.untreated]
weights.prop1 <- (nrow(TDS) / sum(weights.prop1)) * weights.prop1
TDS$propweights <- weights.prop1

## Entropy balance
match.eb <- ebalance(treat, X)
weights.eb <- match.eb$w
weights.eb <- (nrow(TDS) / sum(weights.eb)) * weights.eb
TDS$entweights <- weights.eb

## SVMMatch
match.SVM <- svmmatch(treat, X)
weights.SVM <- match.SVM$bal.wts
weights.SVM <- colMeans(weights.SVM)
weights.SVM <- weights.SVM[id.untreated]
weights.SVM <- (nrow(TDS) / sum(weights.SVM)) * weights.SVM
TDS$SVMweights <- weights.SVM
table(CBPSweights)

################################################################################### 
## 7. Create New TDS means using weights - for each variable
###################################################################################

## Original method
## female
mean.female.TDS.w <- mean(na.omit(TDS$female*TDS$weights.U))
se.female.TDS.w <- sqrt(mean.female.TDS.w * (1 - mean.female.TDS.w) / n.TDS)
lower.female.TDS.w <- mean.female.TDS.w - qnorm(0.975) * se.female.TDS.w
upper.female.TDS.w <- mean.female.TDS.w + qnorm(0.975) * se.female.TDS.w

## college
mean.college.TDS.w <- mean(na.omit(TDS$college*TDS$weights.U))
se.college.TDS.w <- sqrt(mean.college.TDS.w * (1 - mean.college.TDS.w) / n.TDS)
lower.college.TDS.w <- mean.college.TDS.w - qnorm(0.975) * se.college.TDS.w
upper.college.TDS.w <- mean.college.TDS.w + qnorm(0.975) * se.college.TDS.w

## internet
mean.internet.TDS.w <- mean(na.omit(TDS$internet*TDS$weights.U))
se.internet.TDS.w <- sqrt(mean.internet.TDS.w * (1 - mean.internet.TDS.w) / n.TDS)
lower.internet.TDS.w <- mean.internet.TDS.w - qnorm(0.975) * se.internet.TDS.w
upper.internet.TDS.w <- mean.internet.TDS.w + qnorm(0.975) * se.internet.TDS.w

## agegroup
mean.agegroup.TDS.w <- mean(na.omit(TDS$agegroup*TDS$weights.U))
se.agegroup.TDS.w <- sd(na.omit(TDS$agegroup)) / sqrt(n.TDS)
lower.agegroup.TDS.w <- mean.agegroup.TDS.w - qnorm(0.975) * se.agegroup.TDS.w
upper.agegroup.TDS.w <- mean.agegroup.TDS.w + qnorm(0.975) * se.agegroup.TDS.w

## Create mean vector
TDS.means.w <- c(mean.agegroup.TDS.w, mean.female.TDS.w, mean.college.TDS.w, 
                        mean.internet.TDS.w)

## CBPS
## female
mean.female.TDS.CBPS <- mean(na.omit(TDS$female*TDS$CBPSweights))
se.female.TDS.CBPS <- sqrt(mean.female.TDS.CBPS * (1 - mean.female.TDS.CBPS) / n.TDS)
lower.female.TDS.CBPS <- mean.female.TDS.CBPS - qnorm(0.975) * se.female.TDS.CBPS
upper.female.TDS.CBPS <- mean.female.TDS.CBPS + qnorm(0.975) * se.female.TDS.CBPS

## college
mean.college.TDS.CBPS <- mean(na.omit(TDS$college*TDS$CBPSweights))
se.college.TDS.CBPS <- sqrt(mean.college.TDS.CBPS * (1 - mean.college.TDS.CBPS) / n.TDS)
lower.college.TDS.CBPS <- mean.college.TDS.CBPS - qnorm(0.975) * se.college.TDS.CBPS
upper.college.TDS.CBPS <- mean.college.TDS.CBPS + qnorm(0.975) * se.college.TDS.CBPS

## internet
mean.internet.TDS.CBPS <- mean(na.omit(TDS$internet*TDS$CBPSweights))
se.internet.TDS.CBPS <- sqrt(mean.internet.TDS.CBPS * (1 - mean.internet.TDS.CBPS) / n.TDS)
lower.internet.TDS.CBPS <- mean.internet.TDS.CBPS - qnorm(0.975) * se.internet.TDS.CBPS
upper.internet.TDS.CBPS <- mean.internet.TDS.CBPS + qnorm(0.975) * se.internet.TDS.CBPS

## agegroup
mean.agegroup.TDS.CBPS <- mean(na.omit(TDS$agegroup*TDS$CBPSweights))
se.agegroup.TDS.CBPS <- sd(na.omit(TDS$agegroup)) / sqrt(n.TDS)
lower.agegroup.TDS.CBPS <- mean.agegroup.TDS.CBPS - qnorm(0.975) * se.agegroup.TDS.CBPS
upper.agegroup.TDS.CBPS <- mean.agegroup.TDS.CBPS + qnorm(0.975) * se.agegroup.TDS.CBPS

## Create mean vector
TDS.means.CBPS <- c(mean.agegroup.TDS.CBPS, mean.female.TDS.CBPS, mean.college.TDS.CBPS, 
                 mean.internet.TDS.CBPS)

## Propensity score
## female
mean.female.TDS.prop <- mean(na.omit(TDS$female*TDS$propweights))
se.female.TDS.prop <- sqrt(mean.female.TDS.prop * (1 - mean.female.TDS.prop) / n.TDS)
lower.female.TDS.prop <- mean.female.TDS.prop - qnorm(0.975) * se.female.TDS.prop
upper.female.TDS.prop <- mean.female.TDS.prop + qnorm(0.975) * se.female.TDS.prop

## college
mean.college.TDS.prop <- mean(na.omit(TDS$college*TDS$propweights))
se.college.TDS.prop <- sqrt(mean.college.TDS.prop * (1 - mean.college.TDS.prop) / n.TDS)
lower.college.TDS.prop <- mean.college.TDS.prop - qnorm(0.975) * se.college.TDS.prop
upper.college.TDS.prop <- mean.college.TDS.prop + qnorm(0.975) * se.college.TDS.prop

## internet
mean.internet.TDS.prop <- mean(na.omit(TDS$internet*TDS$propweights))
se.internet.TDS.prop <- sqrt(mean.internet.TDS.prop * (1 - mean.internet.TDS.prop) / n.TDS)
lower.internet.TDS.prop <- mean.internet.TDS.prop - qnorm(0.975) * se.internet.TDS.prop
upper.internet.TDS.prop <- mean.internet.TDS.prop + qnorm(0.975) * se.internet.TDS.prop

## agegroup
mean.agegroup.TDS.prop <- mean(na.omit(TDS$agegroup*TDS$propweights))
se.agegroup.TDS.prop <- sd(na.omit(TDS$agegroup)) / sqrt(n.TDS)
lower.agegroup.TDS.prop <- mean.agegroup.TDS.prop - qnorm(0.975) * se.agegroup.TDS.prop
upper.agegroup.TDS.prop <- mean.agegroup.TDS.prop + qnorm(0.975) * se.agegroup.TDS.prop

## Create mean vector
TDS.means.prop <- c(mean.agegroup.TDS.prop, mean.female.TDS.prop, mean.college.TDS.prop, 
                    mean.internet.TDS.prop)

## Entropy balance
## female
mean.female.TDS.ent <- mean(na.omit(TDS$female*TDS$entweights))
se.female.TDS.ent <- sqrt(mean.female.TDS.ent * (1 - mean.female.TDS.ent) / n.TDS)
lower.female.TDS.ent <- mean.female.TDS.ent - qnorm(0.975) * se.female.TDS.ent
upper.female.TDS.ent <- mean.female.TDS.ent + qnorm(0.975) * se.female.TDS.ent

## college
mean.college.TDS.ent <- mean(na.omit(TDS$college*TDS$entweights))
se.college.TDS.ent <- sqrt(mean.college.TDS.ent * (1 - mean.college.TDS.ent) / n.TDS)
lower.college.TDS.ent <- mean.college.TDS.ent - qnorm(0.975) * se.college.TDS.ent
upper.college.TDS.ent <- mean.college.TDS.ent + qnorm(0.975) * se.college.TDS.ent

## internet
mean.internet.TDS.ent <- mean(na.omit(TDS$internet*TDS$entweights))
se.internet.TDS.ent <- sqrt(mean.internet.TDS.ent * (1 - mean.internet.TDS.ent) / n.TDS)
lower.internet.TDS.ent <- mean.internet.TDS.ent - qnorm(0.975) * se.internet.TDS.ent
upper.internet.TDS.ent <- mean.internet.TDS.ent + qnorm(0.975) * se.internet.TDS.ent

## agegroup
mean.agegroup.TDS.ent <- mean(na.omit(TDS$agegroup*TDS$entweights))
se.agegroup.TDS.ent <- sd(na.omit(TDS$agegroup)) / sqrt(n.TDS)
lower.agegroup.TDS.ent <- mean.agegroup.TDS.ent - qnorm(0.975) * se.agegroup.TDS.ent
upper.agegroup.TDS.ent <- mean.agegroup.TDS.ent + qnorm(0.975) * se.agegroup.TDS.ent

## Create mean vector
TDS.means.ent <- c(mean.agegroup.TDS.ent, mean.female.TDS.ent, mean.college.TDS.ent, 
                    mean.internet.TDS.ent)

## SVM
## female
mean.female.TDS.SVM <- mean(na.omit(TDS$female*TDS$SVMweights))
se.female.TDS.SVM <- sqrt(mean.female.TDS.SVM * (1 - mean.female.TDS.SVM) / n.TDS)
lower.female.TDS.SVM <- mean.female.TDS.SVM - qnorm(0.975) * se.female.TDS.SVM
upper.female.TDS.SVM <- mean.female.TDS.SVM + qnorm(0.975) * se.female.TDS.SVM

## college
mean.college.TDS.SVM <- mean(na.omit(TDS$college*TDS$SVMweights))
se.college.TDS.SVM <- sqrt(mean.college.TDS.SVM * (1 - mean.college.TDS.SVM) / n.TDS)
lower.college.TDS.SVM <- mean.college.TDS.SVM - qnorm(0.975) * se.college.TDS.SVM
upper.college.TDS.SVM <- mean.college.TDS.SVM + qnorm(0.975) * se.college.TDS.SVM

## internet
mean.internet.TDS.SVM <- mean(na.omit(TDS$internet*TDS$SVMweights))
se.internet.TDS.SVM <- sqrt(mean.internet.TDS.SVM * (1 - mean.internet.TDS.SVM) / n.TDS)
lower.internet.TDS.SVM <- mean.internet.TDS.SVM - qnorm(0.975) * se.internet.TDS.SVM
upper.internet.TDS.SVM <- mean.internet.TDS.SVM + qnorm(0.975) * se.internet.TDS.SVM

## agegroup
mean.agegroup.TDS.SVM <- mean(na.omit(TDS$agegroup*TDS$SVMweights))
se.agegroup.TDS.SVM <- sd(na.omit(TDS$agegroup)) / sqrt(n.TDS)
lower.agegroup.TDS.SVM <- mean.agegroup.TDS.SVM - qnorm(0.975) * se.agegroup.TDS.SVM
upper.agegroup.TDS.SVM <- mean.agegroup.TDS.SVM + qnorm(0.975) * se.agegroup.TDS.SVM

## Create mean vector
TDS.means.SVM <- c(mean.agegroup.TDS.SVM, mean.female.TDS.SVM, mean.college.TDS.SVM, 
                    mean.internet.TDS.SVM)

################################################################################### 
## 8. Create Figure 6: Adjusted TDS Sample vs. Arab Barometer
###################################################################################

data2 <- cbind(TDS.means.SVM, TDS.means.ent, TDS.means.prop, TDS.means.CBPS, 
               TDS.means.w, TDS.means[1:4], UrbanProtest.means[1:4])
data2 <- as.data.frame(data2)
row.names(data2) <- c("Over 30 Years Old","Female", "BA Degree or Above", "Internet User")

## plot
dotchart(t(data2), color=c("red", "red", "red", "red", "red", "blue", "black"),
         cex=0.8, pch=16, xlim=c(0,1),
         labels = c('TDS (SVM)', 'TDS (Ent Bal)', 'TDS (Prop)', 'TDS (CBPS)', 'TDS (Pred Prob)', 
                    'TDS (snowball)', 'AB (Urban Protesters)'),
         xlab = "Proportion")
segments(lower.internet.TDS.SVM, 1:1, upper.internet.TDS.SVM, 1:1, col="red")
segments(lower.internet.TDS.ent, 2:2, upper.internet.TDS.ent, 2:2, col="red")
segments(lower.internet.TDS.prop, 3:3, upper.internet.TDS.prop, 3:3, col="red")
segments(lower.internet.TDS.CBPS, 4:4, upper.internet.TDS.CBPS, 4:4, col="red")
segments(lower.internet.TDS.w, 5:5, upper.internet.TDS.w, 5:5, col="red")
segments(lower.internet.TDS, 6:6, upper.internet.TDS, 6:6, col="blue")
segments(lower.internet.AB.U.pr, 7:7, upper.internet.AB.U.pr, 7:7, col="black")
segments(lower.college.TDS.SVM, 10:10, upper.college.TDS.SVM, 10:10, col="red")
segments(lower.college.TDS.ent, 11:11, upper.college.TDS.ent, 11:11, col="red")
segments(lower.college.TDS.prop, 12:12, upper.college.TDS.prop, 12:12, col="red")
segments(lower.college.TDS.CBPS, 13:13, upper.college.TDS.CBPS, 13:13, col="red")
segments(lower.college.TDS.w, 14:14, upper.college.TDS.w, 14:14, col="red")
segments(lower.college.TDS, 15:15, upper.college.TDS, 15:15, col="blue")
segments(lower.college.AB.U.pr, 16:16, upper.college.AB.U.pr, 16:16, col="black")
segments(lower.female.TDS.SVM, 19:19, upper.female.TDS.SVM, 19:19, col="red")
segments(lower.female.TDS.ent, 20:20, upper.female.TDS.ent, 20:20, col="red")
segments(lower.female.TDS.prop, 21:21, upper.female.TDS.prop, 21:21, col="red")
segments(lower.female.TDS.CBPS, 22:22, upper.female.TDS.CBPS, 22:22, col="red")
segments(lower.female.TDS.w, 23:23, upper.female.TDS.w, 23:23, col="red")
segments(lower.female.TDS, 24:24, upper.female.TDS, 24:24, col="blue")
segments(lower.female.AB.U.pr, 25:25, upper.female.AB.U.pr, 25:25, col="black")
segments(lower.agegroup.TDS.SVM, 28:28, upper.agegroup.TDS.SVM, 28:28, col="red")
segments(lower.agegroup.TDS.ent, 29:29, upper.agegroup.TDS.ent, 29:29, col="red")
segments(lower.agegroup.TDS.prop, 30:30, upper.agegroup.TDS.prop, 30:30, col="red")
segments(lower.agegroup.TDS.CBPS, 31:31, upper.agegroup.TDS.CBPS, 31:31, col="red")
segments(lower.agegroup.TDS.w, 32:32, upper.agegroup.TDS.w, 32:32, col="red")
segments(lower.agegroup.TDS, 33:33, upper.agegroup.TDS, 33:33, col="blue")
segments(lower.agegroup.AB.U.pr, 34:34, upper.agegroup.AB.U.pr, 34:34, col="black")
dev.off()

## Show that entbal is best
data3 <- data2[,1:5]
data3 <- data3 - data2[,7]
data3 <- abs(data3)
data3 <- colSums(data3)
names(data3) <- c("SVM", "Ent Bal", "Prop", "CBPS", "Pred Prob")
data3 <- as.matrix(data3)
colnames(data3) <- "Distance from AB"
xtable(data3)

##########################################################
## 9. Produce Predicted Probabilities by Media Type 
## using Weighted (Entbal) and Unweighted Models
##########################################################

## unweighted model 
model <- glm(day1 ~ age + female + internet + phoneJan25 + TVJan25 + textJan25 + 
                 twitterJan25 + facebookJan25 + f2fJan25, data = TDS, family = "binomial")

## weighted model 
z1 <- as.matrix(TDS$day1)
z0 <- as.matrix(1- TDS$day1)
z <- cbind(z1, z0)
model.w <- glm(z ~ age + female + internet + phoneJan25 + TVJan25 + textJan25 + 
                 twitterJan25 + facebookJan25 + f2fJan25, 
               data = TDS, family = "binomial", weights = entweights)

summary(model)
summary(model.w)

## predict function for calculating predicted probs
my.predict <- function (data, beta, type){ 
  data <- format(data, type)
  pprob <- 1 / (1 + exp(-data %*% beta))
  return(pprob)
}

## boot function: function that draws ONE bootstrap sample, estimates the model on 
## that sample, then returns our different predicted probabilities for that sample
boot.i <- function(data, formula){
  boot.sample <- sample(1:nrow(data), nrow(data), replace = T) # 1. draw bootstrap sample
  data <- data[boot.sample, ] 
  model <- glm(formula, family = binomial, data = data) # 2. run the model
  beta <- coef(model) # 3. extract coefficient
  data <- model.matrix(model) # 4. extract corresponding model matrix
  # 5. get predicted probabilities 
  p.1 <- my.predict(data, beta, type = 1) 
  p.2 <- my.predict(data, beta, type = 2)
  outcomes <- c(mean(p.2) - mean(p.1))
  colnames <- c("Diff")
  return(outcomes)
}

## boot function for weighted model
boot.i.w <- function(data, formula){
  boot.sample <- sample(1:nrow(data), nrow(data), replace = T) # 1. draw bootstrap sample
  data <- data[boot.sample, ] 
  model <- glm(formula, family = binomial, data = data, weights = `(weights)`) # 2. run the model
  beta <- coef(model) # 3. extract coefficient
  data <- model.matrix(model) # 4. extract corresponding model matrix
  # 5. get predicted probabilities 
  p.1 <- my.predict(data, beta, type = 1) 
  p.2 <- my.predict(data, beta, type = 2)
  outcomes <- c(mean(p.2) - mean(p.1))
  colnames <- c("Diff")
  return(outcomes)
}

## Create table for storing values
probs <- matrix(NA, ncol = 3, nrow = 6)
colnames(probs) <- c("Diff", "Lower", "Upper") 
rownames(probs) <- c("Face-to-Face", "Phone", "Text Msg", "Satellite TV", 
                     "Twitter", "Facebook")

probs.w <- matrix(NA, ncol = 3, nrow = 6)
colnames(probs.w) <- c("Diff", "Lower", "Upper") 
rownames(probs.w) <- c("Face-to-Face", "Phone", "Text Msg", "Satellite TV", 
                       "Twitter", "Facebook")

## Phone use
## set two outcomes
format <- function(data, type){
  if(type==1){data[ ,c("phoneJan25")] <-0}
  else if (type ==2){data[ ,c("phoneJan25")] <-1
                     
  }else {data=data}
  return(data)
}

## calculate difference in unweighted probs
probs.phoneJan25 <- mean(my.predict(model.matrix(model), coef(model), 2)) - 
           mean(my.predict(model.matrix(model), coef(model), 1))
probs[2,1] <- probs.phoneJan25

boot.probs.phoneJan25 <- replicate(500, boot.i(model.frame(model), 
                                               formula(model))) # do bootstrap 500 times
boot.ci.phoneJan25 <- quantile(2 * probs.phoneJan25 - boot.probs.phoneJan25,
                               probs = c(.025, 0.975)) # get CI's
probs[2,2] <- boot.ci.phoneJan25[1]
probs[2,3] <- boot.ci.phoneJan25[2]
probs

## calculate difference in weighted probs
probs.w.phoneJan25 <- c(mean(my.predict(model.matrix(model.w), coef(model.w), 2)) -
             mean(my.predict(model.matrix(model.w), coef(model.w), 1)))
probs.w[2,1] <- probs.w.phoneJan25

boot.probs.w.phoneJan25 <- replicate(500, boot.i.w(model.frame(model.w), 
                                                 formula(model.w))) # do bootstrap 500 times
boot.ci.w.phoneJan25 <- quantile(2 * probs.w.phoneJan25 - boot.probs.w.phoneJan25,
                                 probs = c(.025, 0.975))
probs.w[2,2] <- boot.ci.w.phoneJan25[1]
probs.w[2,3] <- boot.ci.w.phoneJan25[2]
probs.w

## TV Use
## set two outcomes (pick covariate)
format <- function(data, type){
  if(type==1){data[ ,c("TVJan25")] <-0}
  else if (type ==2){data[ ,c("TVJan25")] <-1
                     
  }else {data=data}
  return(data)
}

## calculate difference in unweighted probs
probs.TVJan25 <- mean(my.predict(model.matrix(model), coef(model), 2)) - 
  mean(my.predict(model.matrix(model), coef(model), 1))
probs[4,1] <- probs.TVJan25

boot.probs.TVJan25 <- replicate(500, boot.i(model.frame(model), 
                                               formula(model))) # do bootstrap 500 times
boot.ci.TVJan25 <- quantile(2 * probs.TVJan25 - boot.probs.TVJan25,
                               probs = c(.025, 0.975)) # get CI's
probs[4,2] <- boot.ci.TVJan25[1]
probs[4,3] <- boot.ci.TVJan25[2]
probs

## calculate difference in weighted probs
probs.w.TVJan25 <- c(mean(my.predict(model.matrix(model.w), coef(model.w), 2)) -
                          mean(my.predict(model.matrix(model.w), coef(model.w), 1)))
probs.w[4,1] <- probs.w.TVJan25

boot.probs.w.TVJan25 <- replicate(500, boot.i.w(model.frame(model.w), 
                                                 formula(model.w))) # do bootstrap 500 times
boot.ci.w.TVJan25 <- quantile(2 * probs.w.TVJan25 - boot.probs.w.TVJan25,
                                 probs = c(.025, 0.975))
probs.w[4,2] <- boot.ci.w.TVJan25[1]
probs.w[4,3] <- boot.ci.w.TVJan25[2]
probs.w

##Text use
## set two outcomes (pick covariate)
format <- function(data, type){
  if(type==1){data[ ,c("textJan25")] <-0}
  else if (type ==2){data[ ,c("textJan25")] <-1
                     
  }else {data=data}
  return(data)
}

## calculate difference in unweighted probs
probs.textJan25 <- mean(my.predict(model.matrix(model), coef(model), 2)) - 
  mean(my.predict(model.matrix(model), coef(model), 1))
probs[3,1] <- probs.textJan25

boot.probs.textJan25 <- replicate(500, boot.i(model.frame(model), 
                                               formula(model))) # do bootstrap 500 times
boot.ci.textJan25 <- quantile(2 * probs.textJan25 - boot.probs.textJan25,
                               probs = c(.025, 0.975)) # get CI's
probs[3,2] <- boot.ci.textJan25[1]
probs[3,3] <- boot.ci.textJan25[2]
probs

## calculate difference in weighted probs
probs.w.textJan25 <- c(mean(my.predict(model.matrix(model.w), coef(model.w), 2)) -
                          mean(my.predict(model.matrix(model.w), coef(model.w), 1)))
probs.w[3,1] <- probs.w.textJan25

boot.probs.w.textJan25 <- replicate(500, boot.i.w(model.frame(model.w), 
                                                 formula(model.w))) # do bootstrap 500 times
boot.ci.w.textJan25 <- quantile(2 * probs.w.textJan25 - boot.probs.w.textJan25,
                                 probs = c(.025, 0.975))
probs.w[3,2] <- boot.ci.w.textJan25[1]
probs.w[3,3] <- boot.ci.w.textJan25[2]
probs.w

## Twitter use
## set two outcomes (pick covariate)
format <- function(data, type){
  if(type==1){data[ ,c("twitterJan25")] <-0}
  else if (type ==2){data[ ,c("twitterJan25")] <-1
                     
  }else {data=data}
  return(data)
}

## calculate difference in unweighted probs
probs.twitterJan25 <- mean(my.predict(model.matrix(model), coef(model), 2)) - 
  mean(my.predict(model.matrix(model), coef(model), 1))
probs[5,1] <- probs.twitterJan25

boot.probs.twitterJan25 <- replicate(500, boot.i(model.frame(model), 
                                               formula(model))) # do bootstrap 500 times
boot.ci.twitterJan25 <- quantile(2 * probs.twitterJan25 - boot.probs.twitterJan25,
                               probs = c(.025, 0.975)) # get CI's
probs[5,2] <- boot.ci.twitterJan25[1]
probs[5,3] <- boot.ci.twitterJan25[2]
probs

## calculate difference in weighted probs
probs.w.twitterJan25 <- c(mean(my.predict(model.matrix(model.w), coef(model.w), 2)) -
                          mean(my.predict(model.matrix(model.w), coef(model.w), 1)))
probs.w[5,1] <- probs.w.twitterJan25

boot.probs.w.twitterJan25 <- replicate(500, boot.i.w(model.frame(model.w), 
                                                 formula(model.w))) # do bootstrap 500 times
boot.ci.w.twitterJan25 <- quantile(2 * probs.w.twitterJan25 - boot.probs.w.twitterJan25,
                                 probs = c(.025, 0.975))
probs.w[5,2] <- boot.ci.w.twitterJan25[1]
probs.w[5,3] <- boot.ci.w.twitterJan25[2]
probs.w

## Facebook use
## set two outcomes (pick covariate)
format <- function(data, type){
  if(type==1){data[ ,c("facebookJan25")] <-0}
  else if (type ==2){data[ ,c("facebookJan25")] <-1
                     
  }else {data=data}
  return(data)
}

## calculate difference in unweighted probs
probs.facebookJan25 <- mean(my.predict(model.matrix(model), coef(model), 2)) - 
  mean(my.predict(model.matrix(model), coef(model), 1))
probs[6,1] <- probs.facebookJan25

boot.probs.facebookJan25 <- replicate(500, boot.i(model.frame(model), 
                                                 formula(model))) # do bootstrap 500 times
boot.ci.facebookJan25 <- quantile(2 * probs.facebookJan25 - boot.probs.facebookJan25,
                                 probs = c(.025, 0.975)) # get CI's
probs[6,2] <- boot.ci.facebookJan25[1]
probs[6,3] <- boot.ci.facebookJan25[2]
probs

## calculate difference in weighted probs
probs.w.facebookJan25 <- c(mean(my.predict(model.matrix(model.w), coef(model.w), 2)) -
                            mean(my.predict(model.matrix(model.w), coef(model.w), 1)))
probs.w[6,1] <- probs.w.facebookJan25

boot.probs.w.facebookJan25 <- replicate(500, boot.i.w(model.frame(model.w), 
                                                   formula(model.w))) # do bootstrap 500 times
boot.ci.w.facebookJan25 <- quantile(2 * probs.w.facebookJan25 - boot.probs.w.facebookJan25,
                                   probs = c(.025, 0.975))
probs.w[6,2] <- boot.ci.w.facebookJan25[1]
probs.w[6,3] <- boot.ci.w.facebookJan25[2]
probs.w

##F2F Use
## set two outcomes (pick covariate)
format <- function(data, type){
  if(type==1){data[ ,c("f2fJan25")] <-0}
  else if (type ==2){data[ ,c("f2fJan25")] <-1
                     
  }else {data=data}
  return(data)
}

## calculate difference in unweighted probs
probs.f2fJan25 <- mean(my.predict(model.matrix(model), coef(model), 2)) - 
  mean(my.predict(model.matrix(model), coef(model), 1))
probs[1,1] <- probs.f2fJan25

boot.probs.f2fJan25 <- replicate(500, boot.i(model.frame(model), 
                                                 formula(model))) # do bootstrap 500 times
boot.ci.f2fJan25 <- quantile(2 * probs.f2fJan25 - boot.probs.f2fJan25,
                                 probs = c(.025, 0.975)) # get CI's
probs[1,2] <- boot.ci.f2fJan25[1]
probs[1,3] <- boot.ci.f2fJan25[2]
probs

## calculate difference in weighted probs
probs.w.f2fJan25 <- c(mean(my.predict(model.matrix(model.w), coef(model.w), 2)) -
                            mean(my.predict(model.matrix(model.w), coef(model.w), 1)))
probs.w[1,1] <- probs.w.f2fJan25

boot.probs.w.f2fJan25 <- replicate(500, boot.i.w(model.frame(model.w), 
                                                   formula(model.w))) # do bootstrap 500 times
boot.ci.w.f2fJan25 <- quantile(2 * probs.w.f2fJan25 - boot.probs.w.f2fJan25,
                                   probs = c(.025, 0.975))
probs.w[1,2] <- boot.ci.w.f2fJan25[1]
probs.w[1,3] <- boot.ci.w.f2fJan25[2]
probs.w

## Convert to percentages
probs <- round(probs*100, 2)
probs.w <- round(probs.w*100, 2)

##########################################################
## 10. Produce Figure 4: Effect of Key Information Sources 
## on Probability of Day 1 Protest Participation among All Protesters
##########################################################

par(mar=c(5,7,4,4))
plot(probs[,1], 1:6, yaxt = "n", xlim = c(-30,30), ylim = c(0.5,6.5), col = "blue", 
     pch = 19, xlab = "Effect on Prob of Day 1 Protest", ylab = "")
axis(2, at = 1:6, labels = c("Face-to-Face", "Phone", "Text Msg", "Satellite TV",
                    "Twitter", "Facebook"), las = 2)
abline(v = 0, col = "black", lty = 3)
lines(y = c(1,1), x = c(probs[1,2], probs[1,3]), col = "blue", lty = 2)
lines(y = c(2,2), x = c(probs[2,2], probs[2,3]), col = "blue", lty = 2)
lines(y = c(3,3), x = c(probs[3,2], probs[3,3]), col = "blue", lty = 2)
lines(y = c(4,4), x = c(probs[4,2], probs[4,3]), col = "blue", lty = 2)
lines(y = c(5,5), x = c(probs[5,2], probs[5,3]), col = "blue", lty = 2)
lines(y = c(6,6), x = c(probs[6,2], probs[6,3]), col = "blue", lty = 2)
dev.off()

##########################################################
## 11. Produce Figure 7: Effect of Key Information Sources 
## on Probability of Day 1 Protest (Weighted Model)
##########################################################

par(mar=c(5,7,4,4))
plot(probs.w[,1], 1:6, yaxt = "n", xlim = c(-30,30), ylim = c(0.5,6.5), col = "blue", 
     pch = 19, xlab = "Effect on Prob of Day 1 Protest", ylab = "")
axis(2, at = 1:6, labels = c("Face-to-Face", "Phone", "Text Msg", "Satellite TV",
                             "Twitter", "Facebook"), las = 2)
abline(v = 0, col = "black", lty = 3)
lines(y = c(1,1), x = c(probs.w[1,2], probs.w[1,3]), col = "blue", lty = 2)
lines(y = c(2,2), x = c(probs.w[2,2], probs.w[2,3]), col = "blue", lty = 2)
lines(y = c(3,3), x = c(probs.w[3,2], probs.w[3,3]), col = "blue", lty = 2)
lines(y = c(4,4), x = c(probs.w[4,2], probs.w[4,3]), col = "blue", lty = 2)
lines(y = c(5,5), x = c(probs.w[5,2], probs.w[5,3]), col = "blue", lty = 2)
lines(y = c(6,6), x = c(probs.w[6,2], probs.w[6,3]), col = "blue", lty = 2)
dev.off()

##########################################################
## 12. Produce Table 3: Effect of Media Usage on Jan 25 Protest Participation
##########################################################

stargazer(model,model.w)

